home *** CD-ROM | disk | FTP | other *** search
/ Visual Cafe 3 / Visual Cafe 3.ISO / Vcafe / Main.bin / CollationRules.java < prev    next >
Text File  |  1998-09-22  |  12KB  |  254 lines

  1. /*
  2.  * @(#)CollationRules.java    1.16 98/07/07
  3.  *
  4.  * (C) Copyright Taligent, Inc. 1996,1997 - All Rights Reserved
  5.  * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
  6.  *
  7.  * Portions copyright (c) 1996 Sun Microsystems, Inc. All Rights Reserved.
  8.  *
  9.  *   The original version of this source code and documentation is copyrighted
  10.  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  11.  * materials are provided under terms of a License Agreement between Taligent
  12.  * and Sun. This technology is protected by multiple US and International
  13.  * patents. This notice and attribution to Taligent may not be removed.
  14.  *   Taligent is a registered trademark of Taligent, Inc.
  15.  *
  16.  * Permission to use, copy, modify, and distribute this software
  17.  * and its documentation for NON-COMMERCIAL purposes and without
  18.  * fee is hereby granted provided that this copyright notice
  19.  * appears in all copies. Please refer to the file "copyright.html"
  20.  * for further important copyright and licensing information.
  21.  *
  22.  * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
  23.  * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
  24.  * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  25.  * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
  26.  * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
  27.  * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
  28.  *
  29.  */
  30.  
  31. package java.text;
  32. /**
  33.  * CollationRules contains the default en_US collation rules as a base
  34.  * for building other collation tables.
  35.  * <p>Note that decompositions are done before these rules are used,
  36.  * so they do not have to contain accented characters, such as A-grave.
  37.  * @see                RuleBasedCollator
  38.  * @see                LocaleElements
  39.  * @version            1.16 07/07/98
  40.  * @author             Helena Shih, Mark Davis
  41.  */
  42. final class CollationRules {
  43.     static String DEFAULTRULES = new String(
  44.         "" // no FRENCH accent order by default, add in French Delta
  45.         // IGNORABLES (up to first < character)
  46.         // COMPLETELY IGNORE format characters
  47.         + "='\u200B'=\u200C=\u200D=\u200E=\u200F"
  48.         // Control Characters
  49.         + "=\u0000 =\u0001 =\u0002 =\u0003 =\u0004" //null, .. eot
  50.         + "=\u0005 =\u0006 =\u0007 =\u0008 ='\u0009'" //enq, ...
  51.         + "='\u000b' =\u000e" //vt,, so
  52.         + "=\u000f ='\u0010' =\u0011 =\u0012 =\u0013" //si, dle, dc1, dc2, dc3
  53.         + "=\u0014 =\u0015 =\u0016 =\u0017 =\u0018" //dc4, nak, syn, etb, can
  54.         + "=\u0019 =\u001a =\u001b =\u001c =\u001d" //em, sub, esc, fs, gs
  55.         + "=\u001e =\u001f =\u007f"                   //rs, us, del
  56.         //....then the C1 Latin 1 reserved control codes
  57.         + "=\u0080 =\u0081 =\u0082 =\u0083 =\u0084 =\u0085"
  58.         + "=\u0086 =\u0087 =\u0088 =\u0089 =\u008a =\u008b"
  59.         + "=\u008c =\u008d =\u008e =\u008f =\u0090 =\u0091"
  60.         + "=\u0092 =\u0093 =\u0094 =\u0095 =\u0096 =\u0097"
  61.         + "=\u0098 =\u0099 =\u009a =\u009b =\u009c =\u009d"
  62.         + "=\u009e =\u009f"
  63.         // IGNORE except for secondary, tertiary difference
  64.         // Spaces
  65.         + ";'\u0020';'\u00A0'"                  // spaces
  66.         + ";'\u2000';'\u2001';'\u2002';'\u2003';'\u2004'"  // spaces
  67.         + ";'\u2005';'\u2006';'\u2007';'\u2008';'\u2009'"  // spaces
  68.         + ";'\u200A';'\u3000';'\uFEFF'"                // spaces
  69.         + ";'\r' ;'\t' ;'\n';'\f';'\u000b'"  // whitespace
  70.  
  71.         // Non-spacing accents
  72.  
  73.         + ";\u0301"          // non-spacing acute accent
  74.         + ";\u0300"          // non-spacing grave accent
  75.         + ";\u0306"          // non-spacing breve accent
  76.         + ";\u0302"          // non-spacing circumflex accent
  77.         + ";\u030c"          // non-spacing caron/hacek accent
  78.         + ";\u030a"          // non-spacing ring above accent
  79.         + ";\u030d"          // non-spacing vertical line above
  80.         + ";\u0308"          // non-spacing diaeresis accent
  81.         + ";\u030b"          // non-spacing double acute accent
  82.         + ";\u0303"          // non-spacing tilde accent
  83.         + ";\u0307"          // non-spacing dot above/overdot accent
  84.         + ";\u0304"          // non-spacing macron accent
  85.         + ";\u0337"          // non-spacing short slash overlay (overstruck diacritic)
  86.         + ";\u0327"          // non-spacing cedilla accent
  87.         + ";\u0328"          // non-spacing ogonek accent
  88.         + ";\u0323"          // non-spacing dot-below/underdot accent
  89.         + ";\u0332"          // non-spacing underscore/underline accent
  90.         // with the rest of the general diacritical marks in binary order
  91.         + ";\u0305"          // non-spacing overscore/overline
  92.         + ";\u0309"          // non-spacing hook above
  93.         + ";\u030e"          // non-spacing double vertical line above
  94.         + ";\u030f"          // non-spacing double grave
  95.         + ";\u0310"          // non-spacing chandrabindu
  96.         + ";\u0311"          // non-spacing inverted breve
  97.         + ";\u0312"          // non-spacing turned comma above/cedilla above
  98.         + ";\u0313"          // non-spacing comma above
  99.         + ";\u0314"          // non-spacing reversed comma above
  100.         + ";\u0315"          // non-spacing comma above right
  101.         + ";\u0316"          // non-spacing grave below
  102.         + ";\u0317"          // non-spacing acute below
  103.         + ";\u0318"          // non-spacing left tack below
  104.         + ";\u0319"          // non-spacing tack below
  105.         + ";\u031a"          // non-spacing left angle above
  106.         + ";\u031b"          // non-spacing horn
  107.         + ";\u031c"          // non-spacing left half ring below
  108.         + ";\u031d"          // non-spacing up tack below
  109.         + ";\u031e"          // non-spacing down tack below
  110.         + ";\u031f"          // non-spacing plus sign below
  111.         + ";\u0320"          // non-spacing minus sign below
  112.         + ";\u0321"          // non-spacing palatalized hook below
  113.         + ";\u0322"          // non-spacing retroflex hook below
  114.         + ";\u0324"          // non-spacing double dot below
  115.         + ";\u0325"          // non-spacing ring below
  116.         + ";\u0326"          // non-spacing comma below
  117.         + ";\u0329"          // non-spacing vertical line below
  118.         + ";\u032a"          // non-spacing bridge below
  119.         + ";\u032b"          // non-spacing inverted double arch below
  120.         + ";\u032c"          // non-spacing hacek below
  121.         + ";\u032d"          // non-spacing circumflex below
  122.         + ";\u032e"          // non-spacing breve below
  123.         + ";\u032f"          // non-spacing inverted breve below
  124.         + ";\u0330"          // non-spacing tilde below
  125.         + ";\u0331"          // non-spacing macron below
  126.         + ";\u0333"          // non-spacing double underscore
  127.         + ";\u0334"          // non-spacing tilde overlay
  128.         + ";\u0335"          // non-spacing short bar overlay
  129.         + ";\u0336"          // non-spacing long bar overlay
  130.         + ";\u0338"          // non-spacing long slash overlay
  131.         + ";\u0339"          // non-spacing right half ring below
  132.         + ";\u033a"          // non-spacing inverted bridge below
  133.         + ";\u033b"          // non-spacing square below
  134.         + ";\u033c"          // non-spacing seagull below
  135.         + ";\u033d"          // non-spacing x above
  136.         + ";\u033e"          // non-spacing vertical tilde
  137.         + ";\u033f"          // non-spacing double overscore
  138.         + ";\u0340"          // non-spacing grave tone mark
  139.         + ";\u0341"          // non-spacing acute tone mark
  140.         + ";\u0342;\u0343;\u0344;\u0345;\u0360;\u0361"    // newer
  141.         + ";\u0483;\u0484;\u0485;\u0486"    // Cyrillic accents
  142.  
  143.         + ";\u20D0;\u20D1;\u20D2"           // symbol accents
  144.         + ";\u20D3;\u20D4;\u20D5"           // symbol accents
  145.         + ";\u20D6;\u20D7;\u20D8"           // symbol accents
  146.         + ";\u20D9;\u20DA;\u20DB"           // symbol accents
  147.         + ";\u20DC;\u20DD;\u20DE"           // symbol accents
  148.         + ";\u20DF;\u20E0;\u20E1"           // symbol accents
  149.  
  150.         + ",'\u002D';\u00AD"                // dashes
  151.         + ";\u2010;\u2011;\u2012"           // dashes
  152.         + ";\u2013;\u2014;\u2015"           // dashes
  153.         + ";\u2212"                         // dashes
  154.  
  155.         // other punctuation
  156.  
  157.         + "<'\u005f'"        // underline/underscore (spacing)
  158.         + "<\u00af"          // overline or macron (spacing)
  159.         + "<'\u002c'"        // comma (spacing)
  160.         + "<'\u003b'"        // semicolon
  161.         + "<'\u003a'"        // colon
  162.         + "<'\u0021'"        // exclamation point
  163.         + "<\u00a1"          // inverted exclamation point
  164.         + "<'\u003f'"        // question mark
  165.         + "<\u00bf"          // inverted question mark
  166.         + "<'\u002f'"        // slash
  167.         + "<'\u002e'"        // period/full stop
  168.         + "<\u00b4"          // acute accent (spacing)
  169.         + "<'\u0060'"        // grave accent (spacing)
  170.         + "<'\u005e'"        // circumflex accent (spacing)
  171.         + "<\u00a8"          // diaresis/umlaut accent (spacing)
  172.         + "<'\u007e'"        // tilde accent (spacing)
  173.         + "<\u00b7"          // middle dot (spacing)
  174.         + "<\u00b8"          // cedilla accent (spacing)
  175.         + "<'\u0027'"        // apostrophe
  176.         + "<'\"'"            // quotation marks
  177.         + "<\u00ab"          // left angle quotes
  178.         + "<\u00bb"          // right angle quotes
  179.         + "<'\u0028'"        // left parenthesis
  180.         + "<'\u0029'"        // right parenthesis
  181.         + "<'\u005b'"        // left bracket
  182.         + "<'\u005d'"        // right bracket
  183.         + "<'\u007b'"        // left brace
  184.         + "<'\u007d'"        // right brace
  185.         + "<\u00a7"          // section symbol
  186.         + "<\u00b6"          // paragraph symbol
  187.         + "<\u00a9"          // copyright symbol
  188.         + "<\u00ae"          // registered trademark symbol
  189.         + "<'\u0040'"          // at sign
  190.         + "<\u00a4"          // international currency symbol
  191.         + "<\u00a2"          // cent sign
  192.         + "<'\u0024'"        // dollar sign
  193.         + "<\u20ac"          // euro sign
  194.         + "<\u00a3"          // pound-sterling sign
  195.         + "<\u00a5"          // yen sign
  196.         + "<'\u002a'"        // asterisk
  197.         + "<'\\'"            // backslash
  198.         + "<'\u0026'"        // ampersand
  199.         + "<'\u0023'"        // number sign
  200.         + "<'\u0025'"        // percent sign
  201.         + "<'\u002b'"        // plus sign
  202.         + "<\u00b1"          // plus-or-minus sign
  203.         + "<\u00f7"          // divide sign
  204.         + "<\u00d7"          // multiply sign
  205.         + "<'\u003c'"        // less-than sign
  206.         + "<'\u003d'"        // equal sign
  207.         + "<'\u003e'"        // greater-than sign
  208.         + "<\u00ac"          // end of line symbol/logical NOT symbol
  209.         + "<'\u007c'"          // vertical line/logical OR symbol
  210.         + "<\u00a6"          // broken vertical line
  211.         + "<\u00b0"          // degree symbol
  212.         + "<\u00b5"          // micro symbol
  213.  
  214.         // NUMERICS
  215.  
  216.         + "<0<1<2<3<4<5<6<7<8<9"
  217.         + "<\u00bc<\u00bd<\u00be"   // 1/4,1/2,3/4 fractions
  218.  
  219.         // NON-IGNORABLES
  220.         + "<a,A"
  221.         + "<b,B"
  222.         + "<c,C"
  223.         + "<d,D"
  224.         + "<\u00F0,\u00D0"                  // eth
  225.         + "<e,E"
  226.         + "<f,F"
  227.         + "<g,G"
  228.         + "<h,H"
  229.         + "<i,I"
  230.         + "<j,J"
  231.         + "<k,K"
  232.         + "<l,L"
  233.         + "<m,M"
  234.         + "<n,N"
  235.         + "<o,O"
  236.         + "<p,P"
  237.         + "<q,Q"
  238.         + "<r,R"
  239.         + "<s, S & SS,\u00DF"             // s-zet
  240.         + "<t,T"
  241.         + "& TH, \u00DE &TH, \u00FE "     // thorn
  242.         + "<u,U"
  243.         + "<v,V"
  244.         + "<w,W"
  245.         + "<x,X"
  246.         + "<y,Y"
  247.         + "<z,Z"
  248.         + "&AE,\u00C6"                    // ae & AE ligature
  249.         + "&AE,\u00E6"
  250.         + "&OE,\u0152"                    // oe & OE ligature
  251.         + "&OE,\u0153"
  252.     );
  253. }
  254.